rm(list=ls())
library(ggplot2)
library(tidyr)
library(predictrace)
library(rethnicity)
## ══ WARNING! ════════════════════════════════════════════════════════════════════
## The method provided by this package has its limitations and anyone must use
## them cautiously and responsibly. You should also agree that you only intend to
## use the method for academic research purpose and not for commercial use. You
## would also agree NOT to discriminate anyone based on race or color or any
## characteristic, with the information provided by this package. Please refer to
## the documentation for details:
## https://fangzhou-xie.github.io/rethnicity/index.html
## ════════════════════════════════════════════════════════════════════════════════
library(psych)
##
## Attaching package: 'psych'
## The following objects are masked from 'package:ggplot2':
##
## %+%, alpha
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(grf)
library(rpart)
library(glmnet)
## Loading required package: Matrix
##
## Attaching package: 'Matrix'
## The following objects are masked from 'package:tidyr':
##
## expand, pack, unpack
## Loaded glmnet 4.1-4
library(splines)
library(MASS)
##
## Attaching package: 'MASS'
## The following object is masked from 'package:dplyr':
##
## select
library(lmtest)
## Loading required package: zoo
##
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
library(sandwich)
library(ggplot2)
library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.1 ──
## ✔ tibble 3.1.7 ✔ stringr 1.4.0
## ✔ readr 2.1.2 ✔ forcats 0.5.1
## ✔ purrr 0.3.4
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ psych::%+%() masks ggplot2::%+%()
## ✖ psych::alpha() masks ggplot2::alpha()
## ✖ Matrix::expand() masks tidyr::expand()
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ✖ Matrix::pack() masks tidyr::pack()
## ✖ MASS::select() masks dplyr::select()
## ✖ Matrix::unpack() masks tidyr::unpack()
library(tidymodels)
## ── Attaching packages ────────────────────────────────────── tidymodels 0.2.0 ──
## ✔ broom 0.8.0 ✔ rsample 0.1.1
## ✔ dials 0.1.1 ✔ tune 0.2.0
## ✔ infer 1.0.0 ✔ workflows 0.2.6
## ✔ modeldata 0.1.1 ✔ workflowsets 0.2.1
## ✔ parsnip 0.2.1 ✔ yardstick 0.0.9
## ✔ recipes 0.2.0
## ── Conflicts ───────────────────────────────────────── tidymodels_conflicts() ──
## ✖ psych::%+%() masks ggplot2::%+%()
## ✖ scales::alpha() masks psych::alpha(), ggplot2::alpha()
## ✖ scales::discard() masks purrr::discard()
## ✖ Matrix::expand() masks tidyr::expand()
## ✖ dplyr::filter() masks stats::filter()
## ✖ recipes::fixed() masks stringr::fixed()
## ✖ dplyr::lag() masks stats::lag()
## ✖ Matrix::pack() masks tidyr::pack()
## ✖ dials::prune() masks rpart::prune()
## ✖ MASS::select() masks dplyr::select()
## ✖ yardstick::spec() masks readr::spec()
## ✖ recipes::step() masks stats::step()
## ✖ Matrix::unpack() masks tidyr::unpack()
## ✖ recipes::update() masks Matrix::update(), stats::update()
## • Dig deeper into tidy modeling with R at https://www.tmwr.org
library(fastDummies)
library(xtable)
# Auxiliary function to computes adjusted p-values
# following the Romano-Wolf method.
# For a reference, see http://ftp.iza.org/dp12845.pdf page 8
# t.orig: vector of t-statistics from original model
# t.boot: matrix of t-statistics from bootstrapped models
romano_wolf_correction <- function(t.orig, t.boot) {
abs.t.orig <- abs(t.orig)
abs.t.boot <- abs(t.boot)
abs.t.sorted <- sort(abs.t.orig, decreasing = TRUE)
max.order <- order(abs.t.orig, decreasing = TRUE)
rev.order <- order(max.order)
M <- nrow(t.boot)
S <- ncol(t.boot)
p.adj <- rep(0, S)
p.adj[1] <- mean(apply(abs.t.boot, 1, max) > abs.t.sorted[1])
for (s in seq(2, S)) {
cur.index <- max.order[s:S]
p.init <- mean(apply(abs.t.boot[, cur.index, drop=FALSE], 1, max) > abs.t.sorted[s])
p.adj[s] <- max(p.init, p.adj[s-1])
}
p.adj[rev.order]
}
# Computes adjusted p-values for linear regression (lm) models.
# model: object of lm class (i.e., a linear reg model)
# indices: vector of integers for the coefficients that will be tested
# cov.type: type of standard error (to be passed to sandwich::vcovHC)
# num.boot: number of null bootstrap samples. Increase to stabilize across runs.
# Note: results are probabilitistic and may change slightly at every run.
#
# Adapted from the p_adjust from from the hdm package, written by Philipp Bach.
# https://github.com/PhilippBach/hdm_prev/blob/master/R/p_adjust.R
summary_rw_lm <- function(model, indices=NULL, cov.type="HC2", num.boot=10000) {
if (is.null(indices)) {
indices <- 1:nrow(coef(summary(model)))
}
# Grab the original t values.
summary <- coef(summary(model))[indices,,drop=FALSE]
t.orig <- summary[, "t value"]
# Null resampling.
# This is a trick to speed up bootstrapping linear models.
# Here, we don't really need to re-fit linear regressions, which would be a bit slow.
# We know that betahat ~ N(beta, Sigma), and we have an estimate Sigmahat.
# So we can approximate "null t-values" by
# - Draw beta.boot ~ N(0, Sigma-hat) --- note the 0 here, this is what makes it a *null* t-value.
# - Compute t.boot = beta.boot / sqrt(diag(Sigma.hat))
Sigma.hat <- vcovHC(model, type=cov.type)[indices, indices]
se.orig <- sqrt(diag(Sigma.hat))
num.coef <- length(se.orig)
beta.boot <- mvrnorm(n=num.boot, mu=rep(0, num.coef), Sigma=Sigma.hat)
t.boot <- sweep(beta.boot, 2, se.orig, "/")
p.adj <- romano_wolf_correction(t.orig, t.boot)
result <- cbind(summary[,c(1,2,4),drop=F], p.adj)
colnames(result) <- c('Estimate', 'Std. Error', 'Orig. p-value', 'Adj. p-value')
result
}
setwd(getwd())
ICLR_2018 = read.csv("../ICLR_2018.csv")
ICLR_2019 = read.csv("../ICLR_2019.csv")
ICLR_2017_authors = read.csv("../ICLR_2017_author.csv")
ICLR_2017_conversations = read.csv("../iclr2017_conversations.csv")
ICLR_2018 = ICLR_2018[c(names(ICLR_2018)[1:14])]
ICLR_2019 = ICLR_2019[c(names(ICLR_2019)[1:14])]
authors = rbind(ICLR_2017_authors[c('year','authors','forum')],ICLR_2018[c('year','authors','forum')],ICLR_2019[c('year','authors','forum')])
authors = unique(authors)
authors = as.data.frame(authors)
authors = extract(authors,authors,c("First_Name","Last_Name"), "([^ ]+) (.*)")
authors = na.omit(authors)
authors['race'] = predict_ethnicity(firstnames = authors$First_Name, lastnames = authors$Last_Name, method = "fullname")[7]
yvalue = c(length(unique(ICLR_2017_authors$forum)),length(unique(ICLR_2018$forum)), length(unique(ICLR_2019$forum)))
xvalue = as(c(2017,2018,2019), 'integer')
number_of_papers = data.frame(xvalue,yvalue)
yvalue
## [1] 490 911 1419
ggplot(number_of_papers, aes(x=xvalue, y=yvalue, color = xvalue)) +
geom_line() +
geom_point(size = yvalue/150, alpha= 0.5) +
ggtitle("Number of Submissions to ICLR 2017 - 2019") +
labs(x = "Conference Year", y = "Number of Submission") +
scale_x_discrete(limit = c(2017, 2018, 2019)) +
geom_text(
label=yvalue,
nudge_x=0.2, nudge_y=0.1,
check_overlap=T) +
theme(legend.position="none")
## Warning: Continuous limits supplied to discrete scale.
## Did you mean `limits = factor(...)` or `scale_*_continuous()`?

#ggsave("submission_n.png")
df = authors %>% group_by(year,race) %>%
dplyr::summarise(n = n()) %>%
mutate(freq = round(n / sum(n ), 3))
## `summarise()` has grouped output by 'year'. You can override using the
## `.groups` argument.
df
ggplot(df, aes(x = year, y = freq)) +
geom_line(aes(color = race, linetype = race)) +
scale_color_manual(values = c("darkred", "steelblue",'green','purple')) +
ggtitle("Percent of Authors by Racial Identity from ICLR 2017 - 2019")+
scale_x_discrete(limit = c(2017, 2018, 2019)) +
geom_point()
## Warning: Continuous limits supplied to discrete scale.
## Did you mean `limits = factor(...)` or `scale_*_continuous()`?

#ggsave("freq_authors.png")
ggplot(df, aes(x = year, y = n)) +
geom_line(aes(color = race, linetype = race)) +
scale_color_manual(values = c("darkred", "steelblue",'green','purple')) +
ggtitle("Number of Authors by Racial Identity from ICLR 2017 - 2019") +
scale_x_discrete(limit = c(2017, 2018, 2019)) +
geom_point()
## Warning: Continuous limits supplied to discrete scale.
## Did you mean `limits = factor(...)` or `scale_*_continuous()`?

#ggsave("N_authors.png")
ICLR_2017_reviews = ICLR_2017_conversations[c("year","forum", "rating" ,"confidence" , "review" )]
ICLR_2017_reviews = ICLR_2017_reviews %>%
na_if("") %>%
na.omit
ICLR_2018_reviews = ICLR_2018[c("year" , "forum" , "rating" , "confidence", "review")]
ICLR_2019_reviews = ICLR_2019[c("year" , "forum" , "rating" , "confidence", "review")]
ICLR_reviews = rbind(ICLR_2017_reviews,ICLR_2018_reviews,ICLR_2019_reviews)
ICLR_reviews['rating_number'] = as(sub(":.*","",ICLR_reviews$rating),'numeric')
ICLR_reviews['confidence_number'] = as(sub(":.*","",ICLR_reviews$confidence),'numeric')
ICLR_reviews['review_length'] = nchar(ICLR_reviews$review)
ICLR_reviews = ICLR_reviews %>% mutate(review_length_quantile = ntile(review_length, 4))
ICLR_reviews = unique(ICLR_reviews)
xtable(ICLR_reviews %>% group_by(year) %>%
summarise(rating_average = mean(`rating_number`, na.rm=T), expert_average = mean(`confidence_number`, na.rm=T), review_length_average = mean(`review_length`, na.rm=T)))
ICLR_summary_stats = ICLR_reviews %>% group_by(year) %>%
summarise(rating_average = mean(`rating_number`, na.rm=T), expert_average = mean(`confidence_number`, na.rm=T), review_length_average = mean(`review_length`, na.rm=T), rating_sd = sd(`rating_number`, na.rm = T))
ggplot(ICLR_summary_stats, aes(x=year, y=rating_average)) + geom_point() +
geom_errorbar(aes(ymin=rating_average-rating_sd, ymax=rating_average+rating_sd), width=.2,
position=position_dodge(0.05)) +
scale_x_discrete(limit = c(2017, 2018, 2019)) +
geom_vline(xintercept = 2017.5, linetype="dotted",
color = "red", size=1.5) +
geom_line() +
ggtitle("Average Review Score from ICLR 2017 - 2019")
## Warning: Continuous limits supplied to discrete scale.
## Did you mean `limits = factor(...)` or `scale_*_continuous()`?

#ggsave("ratings.png")
rm(ICLR_analysis_ARI)
## Warning in rm(ICLR_analysis_ARI): object 'ICLR_analysis_ARI' not found
ICLR_analysis_ARI = ICLR_reviews %>% filter(year %in% c(2017,2018))
ICLR_analysis_ARI = full_join(x=ICLR_analysis_ARI,y=authors,by="forum")
#audit data
#ICLR_reviews %>% filter(forum == 'B1-q5Pqxl')
#ICLR_analysis_ARI %>% filter(forum == 'B1-q5Pqxl')
#authors %>% filter(forum == 'B1-q5Pqxl')
ICLR_analysis_ARI = ICLR_analysis_ARI %>% mutate(W = if_else(year.x==2017,0,1))
ICLR_analysis_ARI = na.omit(ICLR_analysis_ARI)
ICLR_analysis_ARI = unique(ICLR_analysis_ARI)
ICLR_analysis_ARI%>% group_by(W) %>% summarise(n())
ICLR_analysis_ARI %>% filter(review_length >= 10000) %>% summarise(n())
treated_review_length = ICLR_analysis_ARI %>% filter(W == 1) %>% dplyr::select(review_length,review_length_quantile)
treated_review_length$treatment = 'Treated'
control_review_length = ICLR_analysis_ARI %>% filter(W == 0) %>% dplyr::select(review_length,review_length_quantile)
control_review_length$treatment = 'Control'
ICLR_review_length = rbind(control_review_length, treated_review_length)
ggplot(ICLR_review_length, aes(x=review_length,fill= treatment)) + geom_histogram(alpha = .2)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

ggplot(ICLR_review_length, aes(x=review_length_quantile,fill= treatment)) + geom_histogram(alpha = .2)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

#names(ICLR_analysis)
#c("rating","confidence","review","rating number","confidence number","review length","First_Name","Last_Name","race")
#remove race when doing model comparison with g
#covariates = c("confidence_number","review_length_quantile")
covariates = c("confidence_number","review_length_quantile","race")
XX <- model.matrix(formula(paste0("~", paste0(covariates, collapse="+"))), data=ICLR_analysis_ARI)
set.seed(1)
forest <- causal_forest(
X=XX,
W=ICLR_analysis_ARI[,"W"],
Y=ICLR_analysis_ARI[,"rating_number"]
)
forest.ate <- average_treatment_effect(forest)
forest.ate
## estimate std.err
## -0.24751592 0.02620021
hist(forest$W.hat, main="Estimated propensity scores \n(causal forest with interaction data)", xlim=c(-.1, 1.1))

print(forest)
## GRF forest object of type causal_forest
## Number of trees: 2000
## Number of training samples: 16066
## Variable importance:
## 1 2 3 4 5 6
## 0.000 0.372 0.292 0.079 0.191 0.066
# Available in randomized settings and observational settings with unconfoundedness+overlap
# A list of vectors indicating the left-out subset
covariates = c("confidence_number","review_length_quantile")
n <- nrow(ICLR_analysis_ARI)
n.folds <- 5
indices <- split(seq(n), sort(seq(n) %% n.folds))
treatment <- "W"
# Preparing data
W <- ICLR_analysis_ARI[,"W"]
Y <- ICLR_analysis_ARI[,"rating_number"]
# Matrix of (transformed) covariates used to estimate E[Y|X,W]
fmla.xw <- formula(paste("~ 0 +", paste0("bs(", covariates, ", df=3)", "*", treatment, collapse=" + ")))
XW <- model.matrix(fmla.xw, ICLR_analysis_ARI)
# Matrix of (transformed) covariates used to predict E[Y|X,W=w] for each w in {0, 1}
data.1 <- ICLR_analysis_ARI
data.1[,treatment] <- 1
XW1 <- model.matrix(fmla.xw, data.1) # setting W=1
data.0 <- ICLR_analysis_ARI
data.0[,treatment] <- 0
XW0 <- model.matrix(fmla.xw, data.0) # setting W=0
# Matrix of (transformed) covariates used to estimate and predict e(X) = P[W=1|X]
fmla.x <- formula(paste(" ~ 0 + ", paste0("bs(", covariates, ", df=3)", collapse=" + ")))
XX <- model.matrix(fmla.x, ICLR_analysis_ARI)
# (Optional) Not penalizing the main effect (the coefficient on W)
penalty.factor <- rep(1, ncol(XW))
penalty.factor[colnames(XW) == treatment] <- 0
# Cross-fitted estimates of E[Y|X,W=1], E[Y|X,W=0] and e(X) = P[W=1|X]
mu.hat.1 <- rep(NA, n)
mu.hat.0 <- rep(NA, n)
e.hat <- rep(NA, n)
for (idx in indices) {
# Estimate outcome model and propensity models
# Note how cross-validation is done (via cv.glmnet) within cross-fitting!
outcome.model <- cv.glmnet(x=XW[-idx,], y=Y[-idx], family="gaussian", penalty.factor=penalty.factor)
propensity.model <- cv.glmnet(x=XX[-idx,], y=W[-idx], family="binomial")
# Predict with cross-fitting
mu.hat.1[idx] <- predict(outcome.model, newx=XW1[idx,], type="response")
mu.hat.0[idx] <- predict(outcome.model, newx=XW0[idx,], type="response")
e.hat[idx] <- predict(propensity.model, newx=XX[idx,], type="response")
}
# Commpute the summand in AIPW estimator
aipw.scores <- (mu.hat.1 - mu.hat.0
+ W / e.hat * (Y - mu.hat.1)
- (1 - W) / (1 - e.hat) * (Y - mu.hat.0))
# Tally up results
ate.aipw.est <- mean(aipw.scores)
ate.aipw.se <- sd(aipw.scores) / sqrt(n)
ate.aipw.tstat <- ate.aipw.est / ate.aipw.se
ate.aipw.pvalue <- 2*(pnorm(1 - abs(ate.aipw.tstat)))
ate.aipw.results <- c(estimate=ate.aipw.est, std.error=ate.aipw.se, t.stat=ate.aipw.tstat, pvalue=ate.aipw.pvalue)
print(ate.aipw.results)
## estimate std.error t.stat pvalue
## -1.071005e+00 4.823925e-02 -2.220194e+01 9.163442e-100
X <- model.matrix(formula("~ 0 + review_length_quantile + confidence_number"), ICLR_analysis_ARI)
W <- ICLR_analysis_ARI$W
Y <- ICLR_analysis_ARI$rating_number
#png(file="ARI_covariates_confidence_review_len.png")
par(mfrow=c(1,2))
for (w in c(0, 1)) {
plot(X[W==w,1] + rnorm(n=sum(W==w), sd=.1), X[W==w,2] + rnorm(n=sum(W==w), sd=.1),
pch=ifelse(Y, 23, 21), cex=1, col=ifelse(Y, rgb(1,0,0,1/4), rgb(0,0,1,1/4)),
bg=ifelse(Y, rgb(1,0,0,1/4), rgb(0,0,1,1/4)), main=ifelse(w, "Treated", "Untreated"), xlab="confidence_number", ylab="review_length_quantile")
}

X <- model.matrix(formula("~ 0 + confidence_number + race"), ICLR_analysis_ARI)
W <- ICLR_analysis_ARI$W
Y <- ICLR_analysis_ARI$rating_number
par(mfrow=c(1,2))
for (w in c(0, 1)) {
plot(X[W==w,1] + rnorm(n=sum(W==w), sd=.1), X[W==w,2] + rnorm(n=sum(W==w), sd=.1),
pch=ifelse(Y, 23, 21), cex=1, col=ifelse(Y, rgb(1,0,0,1/4), rgb(0,0,1,1/4)),
bg=ifelse(Y, rgb(1,0,0,1/4), rgb(0,0,1,1/4)), main=ifelse(w, "Treated", "Untreated"), xlab="confidence_number", ylab="race")
}

X <- model.matrix(formula("~ 0 + review_length_quantile + race"), ICLR_analysis_ARI)
W <- ICLR_analysis_ARI$W
Y <- ICLR_analysis_ARI$rating_number
par(mfrow=c(1,2))
for (w in c(0, 1)) {
plot(X[W==w,1] + rnorm(n=sum(W==w), sd=.1), X[W==w,2] + rnorm(n=sum(W==w), sd=.1),
pch=ifelse(Y, 23, 21), cex=1, col=ifelse(Y, rgb(1,0,0,1/4), rgb(0,0,1,1/4)),
bg=ifelse(Y, rgb(1,0,0,1/4), rgb(0,0,1,1/4)), main=ifelse(w, "Treated", "Untreated"), xlab="review_length_quantile", ylab="race")
}

set.seed(1)
group = 'race'
covariates = c("confidence_number","review_length_quantile", "race")
fmla <- formula(paste0("~ 0 +", paste0(covariates, collapse="+")))
XX <- model.matrix(fmla, ICLR_analysis_ARI)
W=ICLR_analysis_ARI[,"W"]
Y=ICLR_analysis_ARI[,"rating_number"]
forest.tau <- causal_forest(XX, Y, W)
tau.hat <- predict(forest.tau)$predictions
m.hat <- forest.tau$Y.hat # E[Y|X] estimates
e.hat <- forest.tau$W.hat # e(X) := E[W|X] estimates (or known quantity)
tau.hat <- forest.tau$predictions # tau(X) estimates
# Predicting mu.hat(X[i], 1) and mu.hat(X[i], 0) for obs in held-out sample
# Note: to understand this, read equations 6-8 in this vignette
# https://grf-labs.github.io/grf/articles/muhats.html
mu.hat.0 <- m.hat - e.hat * tau.hat # E[Y|X,W=0] = E[Y|X] - e(X)*tau(X)
mu.hat.1 <- m.hat + (1 - e.hat) * tau.hat # E[Y|X,W=1] = E[Y|X] + (1 - e(X))*tau(X)
# Compute AIPW scores
aipw.scores <- tau.hat + W / e.hat * (Y - mu.hat.1) - (1 - W) / (1 - e.hat) * (Y - mu.hat.0)
# Estimate average treatment effect conditional on group membership
fmla <- formula(paste0('aipw.scores ~ factor(', group, ')'))
ols <- lm(fmla, data=transform(ICLR_analysis_ARI[covariates], aipw.scores=aipw.scores))
summary_rw_lm(ols)
## Estimate Std. Error Orig. p-value Adj. p-value
## (Intercept) -0.1779189 0.03573403 6.458432e-07 0.0001
## factor(race)black -0.1657387 0.09321692 7.542458e-02 0.1439
## factor(race)hispanic -0.2720958 0.09217297 3.161659e-03 0.0083
## factor(race)white -0.1028437 0.06165488 9.532431e-02 0.1439
test_calibration(forest.tau)
##
## Best linear fit using forest predictions (on held-out data)
## as well as the mean forest prediction as regressors, along
## with one-sided heteroskedasticity-robust (HC3) SEs:
##
## Estimate Std. Error t value Pr(>t)
## mean.forest.prediction 1.00069 0.10300 9.7157 < 2.2e-16 ***
## differential.forest.prediction 0.75081 0.11220 6.6914 1.142e-11 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
best_linear_projection(forest.tau,ICLR_analysis_ARI[,covariates])
##
## Best linear projection of the conditional average treatment effect.
## Confidence intervals are cluster- and heteroskedasticity-robust (HC3):
##
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.73752719 0.38557259 1.9128 0.055790 .
## confidence_number2 -0.61918346 0.40394541 -1.5328 0.125335
## confidence_number3 -0.97196064 0.38948874 -2.4955 0.012589 *
## confidence_number4 -0.91813845 0.38935204 -2.3581 0.018380 *
## confidence_number5 -1.18820584 0.39512547 -3.0072 0.002641 **
## review_length_quantile2 0.20978171 0.06898097 3.0412 0.002361 **
## review_length_quantile3 -0.05161598 0.07167919 -0.7201 0.471476
## review_length_quantile4 -0.00036196 0.07979586 -0.0045 0.996381
## raceblack -0.15495465 0.09417114 -1.6455 0.099895 .
## racehispanic -0.26695208 0.09157396 -2.9152 0.003560 **
## racewhite -0.10460764 0.06026347 -1.7358 0.082612 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
authors_p = inner_join(ICLR_2017_authors[c('year','authors','forum')],ICLR_2018[c('year','authors','forum')], by ='authors')
authors_p = inner_join(authors_p, ICLR_2019[c('year','authors','forum')], by ='authors')
authors_p = unique(authors_p)
authors_p = tidyr::extract(authors_p,authors,c("First_Name","Last_Name"), "([^ ]+) (.*)")
authors_p = na.omit(authors_p)
authors_p['race'] = predict_ethnicity(firstnames = authors_p$First_Name, lastnames = authors_p$Last_Name, method = "fullname")[7]
authors_p = authors_p %>% dplyr::select(First_Name, Last_Name, race)
authors_p = left_join(authors_p,authors, by =c("First_Name","Last_Name"))
authors_p = authors_p %>% dplyr::select(-race.x)
authors_p = unique(authors_p)
authors_p = dplyr::rename(authors_p, race=race.y)
df = authors_p %>% group_by(race) %>%
dplyr::summarise(n = n()) %>%
mutate(freq = round(n / sum(n ), 3))
df
prestigious_forums = unique(authors_p$forum)
ICLR_reviews = ICLR_reviews %>% mutate (presti_paper = ifelse(forum %in% prestigious_forums, 1, 0))
#only prestigious authors
rm(ICLR_analysis_ARIP)
## Warning in rm(ICLR_analysis_ARIP): object 'ICLR_analysis_ARIP' not found
ICLR_analysis_ARIP = ICLR_reviews %>% filter(year %in% c(2017,2018))
ICLR_analysis_ARIP = full_join(x=ICLR_analysis_ARIP,y=authors_p,by="forum")
#data audit
#ICLR_reviews %>% filter(forum == 'B1-q5Pqxl')
#ICLR_analysis_ARIP %>% filter(forum == 'B1-q5Pqxl')
#authors_p %>% filter(forum == 'B1-q5Pqxl')
ICLR_analysis_ARIP = ICLR_analysis_ARIP %>% mutate(W = if_else(year.x==2017,0,1))
ICLR_analysis_ARIP = na.omit(ICLR_analysis_ARIP)
ICLR_analysis_ARIP = unique(ICLR_analysis_ARIP)
ICLR_analysis_ARIP%>% group_by(W) %>% summarise(n())
#auditing data to see if correct
#authors %>% filter(First_Name=="Angeliki", Last_Name=="Lazaridou")
#authors_p %>% filter(First_Name=="Angeliki", Last_Name=="Lazaridou")
#ICLR_2017_authors %>% dplyr::select(forum, year, authors)%>% filter(authors == "Angeliki Lazaridou") %>% unique()
#ICLR_2018 %>% dplyr::select(forum, year, authors)%>% filter(authors=="Angeliki Lazaridou") %>% unique()
#ICLR_2019 %>% dplyr::select(forum, year, authors)%>% filter(authors=="Angeliki Lazaridou") %>% unique()
ICLR_analysis_ARI = ICLR_reviews %>% filter(year %in% c(2017,2018))
ICLR_analysis_ARI = full_join(x=ICLR_analysis_ARI,y=authors,by="forum")
ICLR_analysis_ARI = ICLR_analysis_ARI %>% mutate(W = if_else(year.x==2017,0,1))
ICLR_analysis_ARI = na.omit(ICLR_analysis_ARI)
ICLR_analysis_ARI = unique(ICLR_analysis_ARI)
set.seed(1)
group = 'presti_paper'
covariates = c("confidence_number","review_length_quantile", "race",'presti_paper')
fmla <- formula(paste0("~ 0 +", paste0(covariates, collapse="+")))
XX <- model.matrix(fmla, ICLR_analysis_ARI)
W=ICLR_analysis_ARI[,"W"]
Y=ICLR_analysis_ARI[,"rating_number"]
forest.tau <- causal_forest(XX, Y, W)
tau.hat <- predict(forest.tau)$predictions
m.hat <- forest.tau$Y.hat # E[Y|X] estimates
e.hat <- forest.tau$W.hat # e(X) := E[W|X] estimates (or known quantity)
tau.hat <- forest.tau$predictions # tau(X) estimates
# Predicting mu.hat(X[i], 1) and mu.hat(X[i], 0) for obs in held-out sample
# Note: to understand this, read equations 6-8 in this vignette
# https://grf-labs.github.io/grf/articles/muhats.html
mu.hat.0 <- m.hat - e.hat * tau.hat # E[Y|X,W=0] = E[Y|X] - e(X)*tau(X)
mu.hat.1 <- m.hat + (1 - e.hat) * tau.hat # E[Y|X,W=1] = E[Y|X] + (1 - e(X))*tau(X)
# Compute AIPW scores
aipw.scores <- tau.hat + W / e.hat * (Y - mu.hat.1) - (1 - W) / (1 - e.hat) * (Y - mu.hat.0)
# Estimate average treatment effect conditional on group membership
fmla <- formula(paste0('aipw.scores ~ factor(', group, ')'))
ols <- lm(fmla, data=transform(ICLR_analysis_ARI[covariates], aipw.scores=aipw.scores))
summary_rw_lm(ols)
## Estimate Std. Error Orig. p-value Adj. p-value
## (Intercept) -0.1662739 0.02953447 1.834082e-08 0.0000
## factor(presti_paper)1 -0.1297235 0.06231457 3.738088e-02 0.0346
test_calibration(forest.tau)
##
## Best linear fit using forest predictions (on held-out data)
## as well as the mean forest prediction as regressors, along
## with one-sided heteroskedasticity-robust (HC3) SEs:
##
## Estimate Std. Error t value Pr(>t)
## mean.forest.prediction 1.002609 0.128767 7.7862 3.661e-15 ***
## differential.forest.prediction 0.838857 0.083417 10.0562 < 2.2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
X <- model.matrix(formula("~ 0 + review_length_quantile + confidence_number"), ICLR_analysis_ARIP)
W <- ICLR_analysis_ARIP$W
Y <- ICLR_analysis_ARIP$rating_number
par(mfrow=c(1,2))
for (w in c(0, 1)) {
plot(X[W==w,1] + rnorm(n=sum(W==w), sd=.1), X[W==w,2] + rnorm(n=sum(W==w), sd=.1),
pch=ifelse(Y, 23, 21), cex=1, col=ifelse(Y, rgb(1,0,0,1/4), rgb(0,0,1,1/4)),
bg=ifelse(Y, rgb(1,0,0,1/4), rgb(0,0,1,1/4)), main=ifelse(w, "Treated", "Untreated"), xlab="confidence_number", ylab="review_length_quantile",ylim=c(0,5))
}

X <- model.matrix(formula("~ 0 + confidence_number + race"), ICLR_analysis_ARIP)
W <- ICLR_analysis_ARIP$W
Y <- ICLR_analysis_ARIP$rating_number
par(mfrow=c(1,2))
for (w in c(0, 1)) {
plot(X[W==w,1] + rnorm(n=sum(W==w), sd=.1), X[W==w,2] + rnorm(n=sum(W==w), sd=.1),
pch=ifelse(Y, 23, 21), cex=1, col=ifelse(Y, rgb(1,0,0,1/4), rgb(0,0,1,1/4)),
bg=ifelse(Y, rgb(1,0,0,1/4), rgb(0,0,1,1/4)), main=ifelse(w, "Treated", "Untreated"), xlab="confidence_number", ylab="race")
}

X <- model.matrix(formula("~ 0 + review_length_quantile + race"), ICLR_analysis_ARIP)
W <- ICLR_analysis_ARIP$W
Y <- ICLR_analysis_ARIP$rating_number
par(mfrow=c(1,2))
for (w in c(0, 1)) {
plot(X[W==w,1] + rnorm(n=sum(W==w), sd=.1), X[W==w,2] + rnorm(n=sum(W==w), sd=.1),
pch=ifelse(Y, 23, 21), cex=1, col=ifelse(Y, rgb(1,0,0,1/4), rgb(0,0,1,1/4)),
bg=ifelse(Y, rgb(1,0,0,1/4), rgb(0,0,1,1/4)), main=ifelse(w, "Treated", "Untreated"), xlab="review_length_quantile", ylab="race")
}

#names(ICLR_analysis)
#c("rating","confidence","review","rating number","confidence number","review length","First_Name","Last_Name","race")
covariates = c("confidence_number","review_length_quantile", "race")
XX <- model.matrix(formula(paste0("~", paste0(covariates, collapse="+"))), data=ICLR_analysis_ARIP)
set.seed(1)
forest <- causal_forest(
X=XX,
W=ICLR_analysis_ARIP[,"W"],
Y=ICLR_analysis_ARIP[,"rating_number"]
)
forest.ate <- average_treatment_effect(forest)
forest.ate
## estimate std.err
## -0.2240945 0.1054972
hist(forest$W.hat, main="Estimated propensity scores \n(causal forest with )", xlim=c(-.1, 1.1))

set.seed(1)
group = 'race'
fmla <- formula(paste0("~ 0 +", paste0(covariates, collapse="+")))
XX <- model.matrix(fmla, ICLR_analysis_ARIP)
W=ICLR_analysis_ARIP[,"W"]
Y=ICLR_analysis_ARIP[,"rating_number"]
forest.tau <- causal_forest(XX, Y, W)
tau.hat <- predict(forest.tau)$predictions
m.hat <- forest.tau$Y.hat # E[Y|X] estimates
e.hat <- forest.tau$W.hat # e(X) := E[W|X] estimates (or known quantity)
tau.hat <- forest.tau$predictions # tau(X) estimates
# Predicting mu.hat(X[i], 1) and mu.hat(X[i], 0) for obs in held-out sample
# Note: to understand this, read equations 6-8 in this vignette
# https://grf-labs.github.io/grf/articles/muhats.html
mu.hat.0 <- m.hat - e.hat * tau.hat # E[Y|X,W=0] = E[Y|X] - e(X)*tau(X)
mu.hat.1 <- m.hat + (1 - e.hat) * tau.hat # E[Y|X,W=1] = E[Y|X] + (1 - e(X))*tau(X)
# Compute AIPW scores
aipw.scores <- tau.hat + W / e.hat * (Y - mu.hat.1) - (1 - W) / (1 - e.hat) * (Y - mu.hat.0)
# Estimate average treatment effect conditional on group membership
fmla <- formula(paste0('aipw.scores ~ factor(', group, ')'))
ols <- lm(fmla, data=transform(ICLR_analysis_ARIP[covariates], aipw.scores=aipw.scores))
summary_rw_lm(ols)
## Estimate Std. Error Orig. p-value Adj. p-value
## (Intercept) -0.12462750 0.1401247 0.3740249 0.7200
## factor(race)black -0.59928829 0.4313341 0.1650618 0.4836
## factor(race)hispanic -0.30047809 0.3540865 0.3963304 0.7200
## factor(race)white -0.08275372 0.2544635 0.7450992 0.7458
test_calibration(forest.tau )
##
## Best linear fit using forest predictions (on held-out data)
## as well as the mean forest prediction as regressors, along
## with one-sided heteroskedasticity-robust (HC3) SEs:
##
## Estimate Std. Error t value Pr(>t)
## mean.forest.prediction 0.93921 0.49229 1.9078 0.02837 *
## differential.forest.prediction -0.81826 0.58163 -1.4068 0.92009
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
ICLR_analysis_Submission = ICLR_reviews
ICLR_analysis_Submission = ICLR_analysis_Submission %>% group_by(forum, year) %>%
summarise(ave_rating = mean(rating_number), ave_review_len = mean(review_length), ave_confidence_number = mean(confidence_number))
## `summarise()` has grouped output by 'forum'. You can override using the
## `.groups` argument.
ICLR_analysis_Submission = full_join(x=ICLR_analysis_Submission,y=authors,by="forum")
ICLR_analysis_Submission = na.omit(ICLR_analysis_Submission)
ICLR_analysis_Submission = dummy_cols(ICLR_analysis_Submission, select_columns = "race")
ICLR_analysis_Submission = ICLR_analysis_Submission %>% group_by(forum) %>%
summarise(year = year.x,
ave_review_len = ave_review_len,
ave_rating=ave_rating,
ave_confidence_number = ave_confidence_number,
asian_authors = sum(race_asian),
black_authors = sum(race_black),
hispanic_authors = sum(race_hispanic),
white_authors = sum(race_white),
total_authors = sum(race_asian,race_black,race_hispanic,race_white)) %>%
unique()
## `summarise()` has grouped output by 'forum'. You can override using the
## `.groups` argument.
ICLR_analysis_Submission = na.omit(ICLR_analysis_Submission)
#ICLR_analysis_Submission
ICLR_analysis_Submission = ICLR_analysis_Submission %>% mutate(W = if_else(year==2017,0,1))
ICLR_analysis_Submission$ave_review_length_quantile = ntile(ICLR_analysis_Submission$ave_review_len, 4)
ICLR_analysis_Submission$racial_majority = colnames(ICLR_analysis_Submission[,c( "asian_authors","black_authors", "hispanic_authors","white_authors")])[max.col(ICLR_analysis_Submission[,c( "asian_authors","black_authors", "hispanic_authors","white_authors")])]
#ICLR_analysis_Submission %>% filter(First_Name=='Jake')
prestigious_forums = unique(authors_p$forum)
ICLR_analysis_Submission = ICLR_analysis_Submission %>% mutate (presti_paper = ifelse(forum %in% prestigious_forums, 1, 0))
ICLR_analysis_Submission %>% filter(forum=='B1-Hhnslg')
ICLR_analysis_Submission %>% filter(forum=='Hk8N3Sclg')
hist(ICLR_analysis_Submission$ave_review_length_quantile)

names(ICLR_analysis_Submission)
## [1] "forum" "year"
## [3] "ave_review_len" "ave_rating"
## [5] "ave_confidence_number" "asian_authors"
## [7] "black_authors" "hispanic_authors"
## [9] "white_authors" "total_authors"
## [11] "W" "ave_review_length_quantile"
## [13] "racial_majority" "presti_paper"
ICLR_analysis_Submission_1718 = ICLR_analysis_Submission %>% filter(year %in% c(2017,2018))
ICLR_analysis_Submission_1718 = as.data.frame(ICLR_analysis_Submission_1718)
names(ICLR_analysis_Submission_1718)
## [1] "forum" "year"
## [3] "ave_review_len" "ave_rating"
## [5] "ave_confidence_number" "asian_authors"
## [7] "black_authors" "hispanic_authors"
## [9] "white_authors" "total_authors"
## [11] "W" "ave_review_length_quantile"
## [13] "racial_majority" "presti_paper"
covariates = c("ave_confidence_number","ave_review_length_quantile", "asian_authors","black_authors", "hispanic_authors","white_authors")
XX <- model.matrix(formula(paste0("~", paste0(covariates, collapse="+"))), data=ICLR_analysis_Submission_1718)
set.seed(1)
forest <- causal_forest(
X=XX,
W=ICLR_analysis_Submission_1718[,"W"],
Y=ICLR_analysis_Submission_1718[,"ave_rating"]
)
forest.ate <- average_treatment_effect(forest)
forest.ate
## estimate std.err
## -0.25257828 0.07744789
hist(forest$W.hat, main="Estimated propensity scores \n(causal forest with submission data)", xlim=c(-.1, 1.1))

ICLR_analysis_Submission_p_1718 = ICLR_analysis_Submission_1718 %>% filter(presti_paper == 1)
covariates = c("ave_confidence_number","ave_review_length_quantile", "asian_authors","black_authors", "hispanic_authors","white_authors")
XX <- model.matrix(formula(paste0("~", paste0(covariates, collapse="+"))), data=ICLR_analysis_Submission_p_1718)
set.seed(1)
forest <- causal_forest(
X=XX,
W=ICLR_analysis_Submission_p_1718[,"W"],
Y=ICLR_analysis_Submission_p_1718[,"ave_rating"]
#,num.trees = 100
)
forest.ate <- average_treatment_effect(forest)
forest.ate
## estimate std.err
## -0.3084228 0.1432176
hist(forest$W.hat, main="Estimated propensity scores \n(causal forest with submission data)", xlim=c(-.1, 1.1))

ICLR_analysis_Submission_1819 = ICLR_analysis_Submission %>% filter(year %in% c(2019,2018))
ICLR_analysis_Submission_1819 = ICLR_analysis_Submission_1819 %>% mutate(W = if_else(year==2018,0,1))
ICLR_analysis_Submission_1819 = as.data.frame(ICLR_analysis_Submission_1819)
covariates = c("ave_confidence_number","ave_review_length_quantile", "asian_authors","black_authors", "hispanic_authors","white_authors")
XX <- model.matrix(formula(paste0("~", paste0(covariates, collapse="+"))), data=ICLR_analysis_Submission_1819)
set.seed(1)
forest <- causal_forest(
X=XX,
W=ICLR_analysis_Submission_1819[,"W"],
Y=ICLR_analysis_Submission_1819[,"ave_rating"]
#,num.trees = 100
)
#what does the target sample mean and what I should use
forest.ate <- average_treatment_effect(forest)
#forest.ate <- average_treatment_effect(forest, target.sample="control")
forest.ate
## estimate std.err
## -0.009684859 0.052313303
hist(forest$W.hat)

hist(get_scores(forest))

ICLR_analysis_Submission_1819 = ICLR_analysis_Submission %>% filter(year %in% c(2019,2018))
ICLR_analysis_Submission_1819 = ICLR_analysis_Submission_1819 %>% mutate(W = if_else(year==2018,0,1))
ICLR_analysis_Submission_1819 = as.data.frame(ICLR_analysis_Submission_1819)
ICLR_analysis_Submission_p_1819 = ICLR_analysis_Submission_1819 %>% filter(presti_paper==1)
covariates = c("ave_confidence_number","ave_review_length_quantile", "asian_authors","black_authors", "hispanic_authors","white_authors")
XX <- model.matrix(formula(paste0("~", paste0(covariates, collapse="+"))), data=ICLR_analysis_Submission_p_1819)
set.seed(1)
forest <- causal_forest(
X=XX,
W=ICLR_analysis_Submission_p_1819[,"W"],
Y=ICLR_analysis_Submission_p_1819[,"ave_rating"]
#,num.trees = 100
)
#what does the target sample mean and what I should use
forest.ate <- average_treatment_effect(forest)
#forest.ate <- average_treatment_effect(forest, target.sample="control")
forest.ate
## estimate std.err
## 0.02085575 0.12526899
hist(forest$W.hat)

hist(get_scores(forest))

set.seed(1)
rm(HTE_Submission)
## Warning in rm(HTE_Submission): object 'HTE_Submission' not found
group = 'racial_majority'
covariates = c("ave_confidence_number","ave_review_length_quantile", 'racial_majority','presti_paper')
#ICLR_analysis_Submission_1718$racial_majority = as.factor(ICLR_analysis_Submission_1718[,'racial_majority'])
#ICLR_analysis_Submission_1718$racial_majority = unclass(ICLR_analysis_Submission_1718$racial_majority)
#attr(,"levels") "asian_authors" "black_authors" "hispanic_authors" "white_authors"
XX <- model.matrix(formula(paste0("~", paste0(covariates, collapse="+"))), data=ICLR_analysis_Submission_1718)
W=ICLR_analysis_Submission_1718[,"W"]
Y=ICLR_analysis_Submission_1718[,"ave_rating"]
forest.tau <- causal_forest(XX, Y, W)
tau.hat <- predict(forest.tau)$predictions
m.hat <- forest.tau$Y.hat # E[Y|X] estimates
e.hat <- forest.tau$W.hat # e(X) := E[W|X] estimates (or known quantity)
tau.hat <- forest.tau$predictions # tau(X) estimates
# Predicting mu.hat(X[i], 1) and mu.hat(X[i], 0) for obs in held-out sample
# Note: to understand this, read equations 6-8 in this vignette
# https://grf-labs.github.io/grf/articles/muhats.html
mu.hat.0 <- m.hat - e.hat * tau.hat # E[Y|X,W=0] = E[Y|X] - e(X)*tau(X)
mu.hat.1 <- m.hat + (1 - e.hat) * tau.hat # E[Y|X,W=1] = E[Y|X] + (1 - e(X))*tau(X)
# Compute AIPW scores
aipw.scores <- tau.hat + W / e.hat * (Y - mu.hat.1) - (1 - W) / (1 - e.hat) * (Y - mu.hat.0)
# Estimate average treatment effect conditional on group membership
fmla <- formula(paste0('aipw.scores ~ factor(', group, ')'))
ols <- lm(fmla, data=transform(ICLR_analysis_Submission_1718[covariates], aipw.scores=aipw.scores))
summary_rw_lm(ols)
## Estimate Std. Error Orig. p-value
## (Intercept) -0.1197922 0.1067461 0.2619638
## factor(racial_majority)black_authors 0.2259062 0.3095470 0.4656374
## factor(racial_majority)hispanic_authors -0.1934307 0.3473473 0.5776989
## factor(racial_majority)white_authors -0.2425740 0.1877978 0.1966833
## Adj. p-value
## (Intercept) 0.5812
## factor(racial_majority)black_authors 0.7184
## factor(racial_majority)hispanic_authors 0.7184
## factor(racial_majority)white_authors 0.5442
hist(forest.tau$W.hat)

test_calibration(forest.tau)
##
## Best linear fit using forest predictions (on held-out data)
## as well as the mean forest prediction as regressors, along
## with one-sided heteroskedasticity-robust (HC3) SEs:
##
## Estimate Std. Error t value Pr(>t)
## mean.forest.prediction 1.05417 0.32581 3.2355 0.0006214 ***
## differential.forest.prediction -0.68608 0.46004 -1.4914 0.9319540
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
X <- model.matrix(formula("~ 0 + ave_review_length_quantile + ave_confidence_number"), ICLR_analysis_Submission_1718)
W <- ICLR_analysis_Submission_1718$W
Y <- ICLR_analysis_Submission_1718$ave_rating
png(file="Submission_covariates_confidence_review_len.png")
par(mfrow=c(1,2))
for (w in c(0, 1)) {
plot(X[W==w,1] + rnorm(n=sum(W==w), sd=.1), X[W==w,2] + rnorm(n=sum(W==w), sd=.1),
pch=ifelse(Y, 23, 21), cex=1, col=ifelse(Y, rgb(1,0,0,1/4), rgb(0,0,1,1/4)),
bg=ifelse(Y, rgb(1,0,0,1/4), rgb(0,0,1,1/4)), main=ifelse(w, "Treated", "Untreated"), xlab="ave_confidence_number", ylab="ave_review_length_quantile")
}
X <- model.matrix(formula("~ 0 + ave_confidence_number + total_authors"), ICLR_analysis_Submission_1718)
W <- ICLR_analysis_Submission_1718$W
Y <- ICLR_analysis_Submission_1718$ave_rating
par(mfrow=c(1,2))
for (w in c(0, 1)) {
plot(X[W==w,1] + rnorm(n=sum(W==w), sd=.1), X[W==w,2] + rnorm(n=sum(W==w), sd=.1),
pch=ifelse(Y, 23, 21), cex=1, col=ifelse(Y, rgb(1,0,0,1/4), rgb(0,0,1,1/4)),
bg=ifelse(Y, rgb(1,0,0,1/4), rgb(0,0,1,1/4)), main=ifelse(w, "Treated", "Untreated"), xlab="confidence_number", ylab="total_authors")
}

set.seed(1)
rm(HTE_Submission)
## Warning in rm(HTE_Submission): object 'HTE_Submission' not found
group = 'racial_majority'
#group = 'racial_majority'
covariates = c("ave_confidence_number","ave_review_length_quantile", 'racial_majority')
ICLR_analysis_Submission_p_1718 = ICLR_analysis_Submission_1718 %>% filter(presti_paper ==1)
ICLR_analysis_Submission_p_1718$racial_majority = as.factor(ICLR_analysis_Submission_p_1718[,'racial_majority'])
ICLR_analysis_Submission_p_1718$racial_majority = unclass(ICLR_analysis_Submission_p_1718$racial_majority)
#attr(,"levels") "asian_authors" "black_authors" "hispanic_authors" "white_authors"
XX <- model.matrix(formula(paste0("~", paste0(covariates, collapse="+"))), data=ICLR_analysis_Submission_p_1718)
W=ICLR_analysis_Submission_p_1718[,"W"]
Y=ICLR_analysis_Submission_p_1718[,"ave_rating"]
forest.tau <- causal_forest(XX, Y, W)
tau.hat <- predict(forest.tau)$predictions
m.hat <- forest.tau$Y.hat # E[Y|X] estimates
e.hat <- forest.tau$W.hat # e(X) := E[W|X] estimates (or known quantity)
tau.hat <- forest.tau$predictions # tau(X) estimates
# Predicting mu.hat(X[i], 1) and mu.hat(X[i], 0) for obs in held-out sample
# Note: to understand this, read equations 6-8 in this vignette
# https://grf-labs.github.io/grf/articles/muhats.html
mu.hat.0 <- m.hat - e.hat * tau.hat # E[Y|X,W=0] = E[Y|X] - e(X)*tau(X)
mu.hat.1 <- m.hat + (1 - e.hat) * tau.hat # E[Y|X,W=1] = E[Y|X] + (1 - e(X))*tau(X)
# Compute AIPW scores
aipw.scores <- tau.hat + W / e.hat * (Y - mu.hat.1) - (1 - W) / (1 - e.hat) * (Y - mu.hat.0)
# Estimate average treatment effect conditional on group membership
fmla <- formula(paste0('aipw.scores ~ factor(', group, ')'))
ols <- lm(fmla, data=transform(ICLR_analysis_Submission_p_1718[covariates], aipw.scores=aipw.scores))
summary_rw_lm(ols)
## Estimate Std. Error Orig. p-value Adj. p-value
## (Intercept) -0.166285939 0.2008648 0.40851462 0.7332
## factor(racial_majority)2 -0.216384594 0.6025944 0.71982048 0.9185
## factor(racial_majority)3 -1.469848622 0.7540035 0.05232372 0.1658
## factor(racial_majority)4 0.004874355 0.3227878 0.98796334 0.9872
hist(forest.tau$W.hat)

test_calibration(forest.tau)
##
## Best linear fit using forest predictions (on held-out data)
## as well as the mean forest prediction as regressors, along
## with one-sided heteroskedasticity-robust (HC3) SEs:
##
## Estimate Std. Error t value Pr(>t)
## mean.forest.prediction 1.13234 0.47584 2.3797 0.009022 **
## differential.forest.prediction -12.99434 2.21407 -5.8690 1.000000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1